rm(list=ls())
library(dplyr)
library(fst)
library(haven)

data_comp <- 
  read.fst("data_comp.fst")

years <- seq(1993, 2013, by = 4)

for (k in 1:length(years)){
  year <- years[k]
  
  # Load election data for years[k]
  # Select relevant variables
  # recode VALGT_JN to 1/0
  # create empty run variable (useful for naming variables in next chunk)
  candidates_data <-
    read_sas(paste("kv", year, "_recodes_pnr_afid.sas7bdat", 
                   sep = "")) %>% 
    select(c("PNR", "VALGT_JN", "VALGTYPE", "PARTI")) %>% 
    mutate(VALGT_JN = VALGT_JN == "J",
           run_kv   = NA)
  
  # merge on election data 
  suppressWarnings({
    data_comp <-
      left_join(data_comp, candidates_data, by = "PNR") %>% 
      mutate(run_kv      = !is.na(VALGT_JN))
    
  })
  names(data_comp)[tail(1:ncol(data_comp), 4)] <-
    paste(names(candidates_data), "_", year, sep = "")[-1]
  
  print(k)
}

data_comp <- 
  data_comp %>% 
   # sample_frac(0.1) %>%# for fitting 
  mutate(kv_n_elected = sum(c(VALGT_JN_1993, VALGT_JN_1997,
                              VALGT_JN_2001, VALGT_JN_2005,
                              VALGT_JN_2009, VALGT_JN_2013), 
                            na.rm = TRUE),
         kv_ever_elected = kv_n_elected > 0,
         kv_n_run = sum(c(run_kv_1993, run_kv_1997,
                          run_kv_2001, run_kv_2005,
                          run_kv_2009, run_kv_2013)),
         kv_ever_run = kv_n_run > 0,
         elected_kv_1993 = rowSums(as.matrix(VALGT_JN_1993), na.rm = TRUE),
         elected_kv_1997 = rowSums(as.matrix(VALGT_JN_1997), na.rm = TRUE),
         elected_kv_2001 = rowSums(as.matrix(VALGT_JN_2001), na.rm = TRUE),
         elected_kv_2005 = rowSums(as.matrix(VALGT_JN_2005), na.rm = TRUE),
         elected_kv_2009 = rowSums(as.matrix(VALGT_JN_2009), na.rm = TRUE),
         elected_kv_2013 = rowSums(as.matrix(VALGT_JN_2013), na.rm = TRUE)) 

# create variable for whether ppl are in the election year

for(k in 1:length(years)){
  year <- years[k]
  bef <- 
    read.fst(paste("bef", year, ".fst", sep = "")) %>% 
    select("PNR") %>% 
    mutate(in_year = 1)
  
  data_comp <- 
    left_join(data_comp, bef, by = "PNR") 
  
  data_comp <- 
    data_comp%>% 
    mutate(in_year = rowSums(as.matrix(in_year), na.rm = TRUE))
  
  names(data_comp)[tail(1:ncol(data_comp), 1)] <-
    paste(names(data_comp)[tail(1:ncol(data_comp), 1)], "_", year, sep = "")
  print(k)
}  

# Find everyone ever running for parliament -------------------------------

# Loop over election years
years <- c(1990, 1994, 1998, 2001, 2005, 2007, 2011, 2015)

for (k in 1:length(years)){
  year <- years[k]
  
  # Load election data for years[k]
  # Select relevant variables
  # recode VALGT_JN to 1/0
  # create empty run variable (useful for naming variables in next chunk)
  candidates_data <-
    read_sas(paste("fv", year, "_recodes_pnr_afid.sas7bdat", 
                   sep = "")) %>% 
    select(c("PNR", "VALGT_JN", "VALGTYPE", "PARTI")) %>% 
    mutate(VALGT_JN = VALGT_JN == "J",
           run_fv   = NA)
  
  # merge on election data 
  suppressWarnings({
    data_comp <-
      left_join(data_comp, candidates_data, by = "PNR") %>% 
      mutate(run_fv      = !is.na(VALGT_JN))
    
  })
  names(data_comp)[tail(1:ncol(data_comp), 4)] <-
    paste(names(candidates_data), "_", year, "_FV", sep = "")[-1]
  
  print(k)
}


data_comp <- 
  data_comp %>% 
  mutate(fv_n_elected = sum(c(VALGT_JN_1990_FV, VALGT_JN_1994_FV, VALGT_JN_1998_FV,
                              VALGT_JN_2001_FV, VALGT_JN_2005_FV, VALGT_JN_2007_FV,
                              VALGT_JN_2011_FV, VALGT_JN_2015_FV), 
                            na.rm = TRUE),
         fv_ever_elected = fv_n_elected > 0,
         fv_n_run = sum(c(run_fv_1990_FV, run_fv_1994_FV, run_fv_1998_FV, 
                          run_fv_2001_FV, run_fv_2005_FV, run_fv_2007_FV,
                          run_fv_2011_FV, run_fv_2015_FV)),
         fv_ever_run = fv_n_run > 0,
         elected_fv_1990 = rowSums(as.matrix(VALGT_JN_1990_FV), na.rm = TRUE),
         elected_fv_1994 = rowSums(as.matrix(VALGT_JN_1994_FV), na.rm = TRUE),
         elected_fv_1998 = rowSums(as.matrix(VALGT_JN_1998_FV), na.rm = TRUE),
         elected_fv_2001 = rowSums(as.matrix(VALGT_JN_2001_FV), na.rm = TRUE),
         elected_fv_2005 = rowSums(as.matrix(VALGT_JN_2005_FV), na.rm = TRUE),
         elected_fv_2007 = rowSums(as.matrix(VALGT_JN_2007_FV), na.rm = TRUE),
         elected_fv_2011 = rowSums(as.matrix(VALGT_JN_2011_FV), na.rm = TRUE),
         elected_fv_2015 = rowSums(as.matrix(VALGT_JN_2015_FV), na.rm = TRUE)) 


# create variable for whether ppl are in the election year

for(k in 1:length(years)){
  year <- years[k]
  bef <- 
    read.fst(paste("bef", year, ".fst", sep = "")) %>% 
    select("PNR") %>% 
    mutate(in_year_fv = 1)
  
  data_comp <- 
    left_join(data_comp, bef, by = "PNR") 
  
  data_comp <- 
    data_comp%>% 
    mutate(in_year_fv = rowSums(as.matrix(in_year_fv), na.rm = TRUE))
  
  names(data_comp)[tail(1:ncol(data_comp), 1)] <-
    paste(names(data_comp)[tail(1:ncol(data_comp), 1)], "_", year, sep = "")
  print(k)
}    

data_comp <- 
  data_comp %>%
  select(c("PNR"  , "inc_res"         , 
           "four_years"      , "run_kv_1993"     ,  
           "run_kv_1997"     ,     "run_kv_2001"     ,  
           "run_kv_2005"     ,  
           "run_kv_2009"     , 
           "run_kv_2013"     , "kv_n_elected"    ,  
           "kv_ever_elected" , "kv_n_run"        ,
           "kv_ever_run"     , "elected_kv_1993" ,
           "elected_kv_1997" , "elected_kv_2001" ,  
           "elected_kv_2005" , "elected_kv_2009" ,
           "elected_kv_2013" , "in_year_1993"    , 
           "in_year_1997"    , "in_year_2001"    ,  
           "in_year_2005"    , "in_year_2009"    ,
           "in_year_2013"    ,  "run_fv_1990_FV" , 
           "run_fv_1994_FV"  , "run_fv_1998_FV"  , 
           "run_fv_2001_FV"  , 
           "run_fv_2005_FV"  , 
           "run_fv_2007_FV"  , "run_fv_2011_FV"  , 
           "run_fv_2015_FV"  , "fv_n_elected"    ,
           "fv_ever_elected" , "fv_n_run"        ,
           "fv_ever_run"     , "elected_fv_1990" , 
           "elected_fv_1994" , "elected_fv_1998" ,  
           "elected_fv_2001" , "elected_fv_2005" , 
           "elected_fv_2007" , "elected_fv_2011" , 
           "elected_fv_2015" , "in_year_fv_1990" ,  
           "in_year_fv_1994" , "in_year_fv_1998" ,
           "in_year_fv_2001" , "in_year_fv_2005" , 
           "in_year_fv_2007" , "in_year_fv_2011" ,  
           "in_year_fv_2015") )

data_comp <- 
  data_comp %>% 
  filter(!is.na(inc_res))

save(data_comp, file = "projects/who_becomes_dk/data/main_data.rdata")

